{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c7d9ddf1-11d0-49e9-ad2b-2c9a3a3dd397",
   "metadata": {},
   "source": [
    "### Goals\n",
    "This notebook documents the construction of the annual time series describing the average amount of attention schooling and education got during UK parliamentary debates. \n",
    "* __LDA data and code are available upon request and are needed to execute this notebook.__"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9f759736-c3bb-48ab-a27b-635bfc810968",
   "metadata": {},
   "source": [
    "### Library"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5a536225-8b71-4c98-83bb-033cf6110629",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "36a8b574-2d06-4751-870f-bbd82f05a847",
   "metadata": {},
   "source": [
    "### Import LDA thetas\n",
    "Raw data can be found here: https://www.hansard-archive.parliament.uk/\n",
    "\n",
    "LDA __IS NOT__ in the repository. These data and code is available upon request."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3d1d0d46-d592-42c3-afbc-beea221e8ee0",
   "metadata": {},
   "outputs": [],
   "source": [
    "thetas = pd.read_csv(\"REQUEST FROM AUTHOR\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "60497b6d-5e36-42c8-bbfd-7f2bf1fca941",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>speaker</th>\n",
       "      <th>chamber</th>\n",
       "      <th>year</th>\n",
       "      <th>ndigits</th>\n",
       "      <th>length</th>\n",
       "      <th>topic0</th>\n",
       "      <th>topic1</th>\n",
       "      <th>topic2</th>\n",
       "      <th>topic3</th>\n",
       "      <th>...</th>\n",
       "      <th>topic191</th>\n",
       "      <th>topic192</th>\n",
       "      <th>topic193</th>\n",
       "      <th>topic194</th>\n",
       "      <th>topic195</th>\n",
       "      <th>topic196</th>\n",
       "      <th>topic197</th>\n",
       "      <th>topic198</th>\n",
       "      <th>topic199</th>\n",
       "      <th>entropy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>The Speaker</td>\n",
       "      <td>lower</td>\n",
       "      <td>1803</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>0.03169</td>\n",
       "      <td>0.00352</td>\n",
       "      <td>5.015056</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>Lord Hawkesbury</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>0.00455</td>\n",
       "      <td>5.194007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>The Lord Chancellor</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>0.03358</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>0.00373</td>\n",
       "      <td>5.004991</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>Lord Walsingham</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>0</td>\n",
       "      <td>67</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>0.01126</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>0.02928</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>0.00225</td>\n",
       "      <td>4.397629</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>The Earl of Limerick</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>12</td>\n",
       "      <td>698</td>\n",
       "      <td>0.00182</td>\n",
       "      <td>0.00473</td>\n",
       "      <td>0.00473</td>\n",
       "      <td>0.00036</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00036</td>\n",
       "      <td>0.00036</td>\n",
       "      <td>0.00182</td>\n",
       "      <td>0.00036</td>\n",
       "      <td>0.00036</td>\n",
       "      <td>0.00036</td>\n",
       "      <td>0.00764</td>\n",
       "      <td>0.00036</td>\n",
       "      <td>0.00036</td>\n",
       "      <td>3.538782</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 207 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         date               speaker chamber  year  ndigits  length   topic0  \\\n",
       "0  1803-11-22           The Speaker   lower  1803        1      22  0.00352   \n",
       "1  1803-11-22       Lord Hawkesbury   upper  1803        0       5  0.00455   \n",
       "2  1803-11-22   The Lord Chancellor   upper  1803        0      24  0.00373   \n",
       "3  1803-11-22       Lord Walsingham   upper  1803        0      67  0.00225   \n",
       "4  1803-11-22  The Earl of Limerick   upper  1803       12     698  0.00182   \n",
       "\n",
       "    topic1   topic2   topic3  ...  topic191  topic192  topic193  topic194  \\\n",
       "0  0.00352  0.00352  0.00352  ...   0.00352   0.00352   0.00352   0.00352   \n",
       "1  0.00455  0.00455  0.00455  ...   0.00455   0.00455   0.00455   0.00455   \n",
       "2  0.00373  0.00373  0.00373  ...   0.00373   0.00373   0.00373   0.00373   \n",
       "3  0.00225  0.01126  0.00225  ...   0.00225   0.00225   0.00225   0.00225   \n",
       "4  0.00473  0.00473  0.00036  ...   0.00036   0.00036   0.00182   0.00036   \n",
       "\n",
       "   topic195  topic196  topic197  topic198  topic199   entropy  \n",
       "0   0.00352   0.00352   0.00352   0.03169   0.00352  5.015056  \n",
       "1   0.00455   0.00455   0.00455   0.00455   0.00455  5.194007  \n",
       "2   0.00373   0.03358   0.00373   0.00373   0.00373  5.004991  \n",
       "3   0.00225   0.02928   0.00225   0.00225   0.00225  4.397629  \n",
       "4   0.00036   0.00036   0.00764   0.00036   0.00036  3.538782  \n",
       "\n",
       "[5 rows x 207 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "thetas.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95d9f007-3a22-4707-8a46-d4a419277f4b",
   "metadata": {},
   "source": [
    "### Inspect terms coresponding to schooling debate\n",
    "LDA data frame contains data on all topics debated in parliament. This contains data frame has in column 0 the topic, in column 1 its label, and in the remaining columns the most frequent terms associated with the topic."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "26d57b10-11ef-4981-b1f8-b5ebbd3b2f03",
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = pd.read_csv(\"REQUEST FROM AUTHOR\", header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3617f127-b8d4-40fa-94df-6a34d4280b25",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>17</th>\n",
       "      <th>18</th>\n",
       "      <th>19</th>\n",
       "      <th>20</th>\n",
       "      <th>21</th>\n",
       "      <th>22</th>\n",
       "      <th>23</th>\n",
       "      <th>24</th>\n",
       "      <th>25</th>\n",
       "      <th>26</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>topic0</td>\n",
       "      <td>Established Freedom &amp; Security</td>\n",
       "      <td>establish</td>\n",
       "      <td>maintain</td>\n",
       "      <td>secur</td>\n",
       "      <td>admit</td>\n",
       "      <td>doctrin</td>\n",
       "      <td>ground</td>\n",
       "      <td>danger</td>\n",
       "      <td>union</td>\n",
       "      <td>...</td>\n",
       "      <td>contend</td>\n",
       "      <td>exclus</td>\n",
       "      <td>equal</td>\n",
       "      <td>claim</td>\n",
       "      <td>institut</td>\n",
       "      <td>distinct</td>\n",
       "      <td>opinion</td>\n",
       "      <td>proposit</td>\n",
       "      <td>essenti</td>\n",
       "      <td>separ</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>topic1</td>\n",
       "      <td>Crime, Conspiracy, Extortion</td>\n",
       "      <td>crime</td>\n",
       "      <td>coercion</td>\n",
       "      <td>peopl</td>\n",
       "      <td>leagu</td>\n",
       "      <td>outrag</td>\n",
       "      <td>agit</td>\n",
       "      <td>intimid</td>\n",
       "      <td>order</td>\n",
       "      <td>...</td>\n",
       "      <td>chief</td>\n",
       "      <td>thing</td>\n",
       "      <td>men</td>\n",
       "      <td>attempt</td>\n",
       "      <td>ordinari</td>\n",
       "      <td>nation_leagu</td>\n",
       "      <td>peac</td>\n",
       "      <td>prevent</td>\n",
       "      <td>illeg</td>\n",
       "      <td>plan_campaign</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>topic2</td>\n",
       "      <td>Loans and Securities</td>\n",
       "      <td>section</td>\n",
       "      <td>loan</td>\n",
       "      <td>advanc</td>\n",
       "      <td>schedul</td>\n",
       "      <td>provid</td>\n",
       "      <td>secur</td>\n",
       "      <td>guarante</td>\n",
       "      <td>rais</td>\n",
       "      <td>...</td>\n",
       "      <td>enabl</td>\n",
       "      <td>includ</td>\n",
       "      <td>authoris</td>\n",
       "      <td>liabil</td>\n",
       "      <td>instal</td>\n",
       "      <td>repay</td>\n",
       "      <td>applic</td>\n",
       "      <td>princip</td>\n",
       "      <td>mention</td>\n",
       "      <td>borrow_monei</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>topic3</td>\n",
       "      <td>topic3</td>\n",
       "      <td>marquess</td>\n",
       "      <td>regret</td>\n",
       "      <td>remark</td>\n",
       "      <td>agre</td>\n",
       "      <td>express</td>\n",
       "      <td>certainli</td>\n",
       "      <td>doubt</td>\n",
       "      <td>opposit</td>\n",
       "      <td>...</td>\n",
       "      <td>extrem</td>\n",
       "      <td>allud</td>\n",
       "      <td>surpris</td>\n",
       "      <td>occas</td>\n",
       "      <td>impress</td>\n",
       "      <td>confess</td>\n",
       "      <td>opportun</td>\n",
       "      <td>assur</td>\n",
       "      <td>kind</td>\n",
       "      <td>gener</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>topic4</td>\n",
       "      <td>topic4</td>\n",
       "      <td>interest</td>\n",
       "      <td>oppos</td>\n",
       "      <td>support</td>\n",
       "      <td>reason</td>\n",
       "      <td>affect</td>\n",
       "      <td>opposit</td>\n",
       "      <td>promot</td>\n",
       "      <td>concern</td>\n",
       "      <td>...</td>\n",
       "      <td>upstair</td>\n",
       "      <td>involv</td>\n",
       "      <td>certainli</td>\n",
       "      <td>strongli</td>\n",
       "      <td>simpli</td>\n",
       "      <td>throw</td>\n",
       "      <td>care</td>\n",
       "      <td>altogeth</td>\n",
       "      <td>strong</td>\n",
       "      <td>awai</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       0                                1          2         3        4   \\\n",
       "0  topic0  Established Freedom & Security   establish  maintain    secur   \n",
       "1  topic1    Crime, Conspiracy, Extortion       crime  coercion    peopl   \n",
       "2  topic2             Loans and Securities    section      loan   advanc   \n",
       "3  topic3                           topic3   marquess    regret   remark   \n",
       "4  topic4                           topic4   interest     oppos  support   \n",
       "\n",
       "        5        6          7         8        9   ...       17      18  \\\n",
       "0    admit  doctrin     ground    danger    union  ...  contend  exclus   \n",
       "1    leagu   outrag       agit   intimid    order  ...    chief   thing   \n",
       "2  schedul   provid      secur  guarante     rais  ...    enabl  includ   \n",
       "3     agre  express  certainli     doubt  opposit  ...   extrem   allud   \n",
       "4   reason   affect    opposit    promot  concern  ...  upstair  involv   \n",
       "\n",
       "          19        20        21            22        23        24       25  \\\n",
       "0      equal     claim  institut      distinct   opinion  proposit  essenti   \n",
       "1        men   attempt  ordinari  nation_leagu      peac   prevent    illeg   \n",
       "2   authoris    liabil    instal         repay    applic   princip  mention   \n",
       "3    surpris     occas   impress       confess  opportun     assur     kind   \n",
       "4  certainli  strongli    simpli         throw      care  altogeth   strong   \n",
       "\n",
       "              26  \n",
       "0          separ  \n",
       "1  plan_campaign  \n",
       "2   borrow_monei  \n",
       "3          gener  \n",
       "4           awai  \n",
       "\n",
       "[5 rows x 27 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4837338c-f410-4c41-939b-a37a51d43bb3",
   "metadata": {},
   "source": [
    "These were the terms used to debate schooling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "5e3d94fc-a78f-496e-b570-c1b0320545c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>17</th>\n",
       "      <th>18</th>\n",
       "      <th>19</th>\n",
       "      <th>20</th>\n",
       "      <th>21</th>\n",
       "      <th>22</th>\n",
       "      <th>23</th>\n",
       "      <th>24</th>\n",
       "      <th>25</th>\n",
       "      <th>26</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>106</th>\n",
       "      <td>topic106</td>\n",
       "      <td>State Schooling</td>\n",
       "      <td>school</td>\n",
       "      <td>educ</td>\n",
       "      <td>teacher</td>\n",
       "      <td>children</td>\n",
       "      <td>manag</td>\n",
       "      <td>grant</td>\n",
       "      <td>voluntari_school</td>\n",
       "      <td>nation</td>\n",
       "      <td>...</td>\n",
       "      <td>endow</td>\n",
       "      <td>council</td>\n",
       "      <td>effici</td>\n",
       "      <td>scholar</td>\n",
       "      <td>secondari_educ</td>\n",
       "      <td>elementari_educ</td>\n",
       "      <td>instruct</td>\n",
       "      <td>higher</td>\n",
       "      <td>free</td>\n",
       "      <td>train_colleg</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           0                1       2     3        4         5      6      7   \\\n",
       "106  topic106  State Schooling  school  educ  teacher  children  manag  grant   \n",
       "\n",
       "                   8       9   ...     17       18      19       20  \\\n",
       "106  voluntari_school  nation  ...  endow  council  effici  scholar   \n",
       "\n",
       "                 21               22        23      24    25            26  \n",
       "106  secondari_educ  elementari_educ  instruct  higher  free  train_colleg  \n",
       "\n",
       "[1 rows x 27 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels[labels[0]=='topic106']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6a685c2c-1432-41c1-8798-f4b7b32614dc",
   "metadata": {},
   "source": [
    "### Create annual series describing average emphasis on schooling in debates\n",
    "We wonly want topic106, the one about schooling, and we want to average it across all speeches in a given year to get an estimate of the political momentum that year behind schooling."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "67390001-12c2-4122-ad01-ac8aff09cdea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>topic106</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1803</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1804</td>\n",
       "      <td>0.282192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1805</td>\n",
       "      <td>0.469123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1806</td>\n",
       "      <td>0.305268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1807</td>\n",
       "      <td>0.327758</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   year  topic106\n",
       "0  1803       NaN\n",
       "1  1804  0.282192\n",
       "2  1805  0.469123\n",
       "3  1806  0.305268\n",
       "4  1807  0.327758"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Take year average, and lag by 1 year\n",
    "yr_topic106 = thetas.groupby('year').mean()[['topic106']].shift(1)\n",
    "\n",
    "# Transform into percentage (%)\n",
    "yr_topic106['topic106'] = yr_topic106['topic106'] * 100\n",
    "yr_topic106 = yr_topic106.reset_index()\n",
    "yr_topic106.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dcff3665-7fbb-4825-9c83-89250c04f799",
   "metadata": {},
   "source": [
    "### Export"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b9f9ece1-53a0-4320-93c6-82473e01a790",
   "metadata": {},
   "outputs": [],
   "source": [
    "yr_topic106.to_csv(\"yr_topic106.csv\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
